MarkovRankSolver.java example

Explorer

damp.ekeko.snippets-master
- damp.ekeko.snippets.plugin
  - src
    - damp
      - ekeko
        snippets
        BoundDirective.java
        DirectiveOperandBinding.java
        EkekoSnippetsPlugin.java
        ExtractedSnippet.java
        NaiveASTFlattener.java
        OperatorOperandBinding.java
        SnippetBaseListener.java
        SnippetBaseVisitor.java
        SnippetExtractor.java
        SnippetLexer.java
        SnippetListener.java
        SnippetParser.java
        SnippetVisitor.java
        data
        SnippetOperator.java
        TemplateGroup.java
        geneticsearch
        PartialJavaProjectModel.java
        gui
        BoundDirectivesEditorDialog.java
        BoundDirectivesViewer.java
        ChartCanvas.java
        ClojureFileEditorInput.java
        DirectiveOperandBindingEditingSupport.java
        DirectiveOperandBindingLabelProviderValue.java
        DirectiveSelectionDialog.java
        IntendedResultsEditor.java
        IntendedResultsEditorCommandHandler.java
        IntendedResultsEditorInput.java
        IntendedResultsEditorPersistableElementFactory.java
        MutationHistoryDialog.java
        OperandBindingLabelProviderDescription.java
        OperatorOperandBindingEditingSupport.java
        OperatorOperandBindingLabelProviderValue.java
        OperatorOperandsView.java
        OperatorOperandsViewer.java
        OperatorTreeContentProvider.java
        OperatorTreeLabelProvider.java
        PopulationInspectorDialog.java
        QueryInspectorDialog.java
        RecommendationEditor.java
        RecommendationEditorCommandHandler.java
        RecommendationEditorInput.java
        RecommendationEditorPersistableElementFactory.java
        RewritesTemplateEditor.java
        SubjectsTemplateEditor.java
        TemplateCodeGenerator.java
        TemplateEditor.java
        TemplateEditorActionBarContributor.java
        TemplateEditorCommandHandler.java
        TemplateEditorInput.java
        TemplateEditorPersistableElementFactory.java
        TemplateGroupNodeSelectionDialog.java
        TemplateGroupTemplateElement.java
        TemplateGroupViewer.java
        TemplateGroupViewerNodeDoubleClickListener.java
        TemplateGroupViewerNodeSelectionEvent.java
        TemplateGroupViewerNodeSelectionListener.java
        TemplatePrettyPrinter.java
        TemplateTreeContentProvider.java
        TemplateTreeLabelProviders.java
        TransformationEditor.java
        TransformationEditorActionBarContributor.java
        TransformationEditorCommandHandler.java
        TransformationEditorInput.java
        TransformationEditorPersistableElementFactory.java
        TransformationOverviewEditor.java
    - ec
      - util
        MersenneTwister.java
- damp.ekeko.snippets.plugin.test
  - resources
  - src
    - test
      - damp
        ekeko
        snippets
        EkekoSnippetsTest.java
        experiments
        GeneticSearchTest.java

/* Copyright (c) 2003 The Nutch Organization.  All rights reserved.   */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.quality;

import java.util.*;

/*********************************************
 * This finds a ranking of all known pages that
 * minimizes the Kendall Tau distance between the
 * full-ranking and each component ranking.
 * 
 * @author Mike Cafarella
 *********************************************/
public class MarkovRankSolver {

    // Remember all the ranking objects
    Vector orderings = new Vector();
    boolean solved = false;
    TreeMap fullRanking = new TreeMap();

    /**
     * The MarkovRankSolver takes a bunch of rankers.  When
     * there's a call to solveRanking(), we return an array
     * of all the results from those rankers.
     */
    public MarkovRankSolver() {
    }

    /**
     * Add an ordering of items to the MRS' working set.
     * You can call this function as much as you like
     * prior to calling "solve".
     */
    public void addOrdering(Object[] ordering) {
        orderings.add(ordering);
    }

    /**
     * Solving the Markov chain requires N^2 space,
     * where N is the number of unique items returned
     * by the list rankers.  Keep this in mind!
     */
    public void solveRanking() {
        //
        // 1.  Get all known states from the orderings.
        //     Uniquify them.  Then build a state set.
        //
        TreeSet stateSet = new TreeSet();
        Vector allPositions = new Vector();

        for (Enumeration e = orderings.elements(); e.hasMoreElements(); ) {
            Object ordering[] = (Object[]) e.nextElement();
            TreeMap curItemPositions = new TreeMap();

            for (int i = 0; i < ordering.length; i++) {
                stateSet.add(ordering[i]);
                curItemPositions.put(ordering[i], new Integer(i));
            }
            allPositions.add(curItemPositions);
        }
        int s = 0;
        Object states[] = new Object[stateSet.size()];
        for (Iterator it = stateSet.iterator(); it.hasNext(); s++) {
            states[s] = it.next();
        }

        //
        // 2.  Build connectivity matrix.  Each cell
        //     has "1" or "0" in it.
        //
        byte transitions[][] = new byte[states.length][];
        for (int i = 0; i < transitions.length; i++) {
            transitions[i] = new byte[states.length];
        }

        //
        // 3. Iterate through each elt in the lower-left triangle.
        // Also fill in value for its dual in the upper-right.
        //
        for (int i = 0; i < states.length; i++) {
            for (int j = 0; j < i; j++) {

                // Find what a majority of rankers think.
                int item1Better = 0, item2Better = 0;
                for (Enumeration e = allPositions.elements(); e.hasMoreElements(); ) {
                    TreeMap curItemPositions = (TreeMap) e.nextElement();
                    Integer pos1 = (Integer) curItemPositions.get(states[i]);
                    Integer pos2 = (Integer) curItemPositions.get(states[j]);

                    if (pos1 != null && pos2 != null) {

                        if (pos1.intValue() < pos2.intValue()) {
                            item1Better++;
                        } else if (pos1.intValue() > pos2.intValue()) {
                            item2Better++;
                        }
                    }
                }

                //
                // If there's a majority to be found, fill in the 
                // transition matrix.  We fill in a "1" when we want
                // to make the transition from i to j.  That is, when 
                // the majority thinks the rank at j is smaller than the
                // rank at i, we make the transit.
                //
                if (((item1Better > 0) || (item2Better > 0)) && 
                    (item1Better != item2Better)) {
                    transitions[i][j] = (item1Better < item2Better) ? (byte) 1 : (byte) 0;
                    transitions[j][i] = (byte) (1 - transitions[i][j]);
                }
            }
        }

        //
        // To maintain the final sorted list...
        //
        float lastStateDist[] = new float[states.length];
        final float curStateDist[] = new float[states.length];
        int numTransitions[] = new int[states.length];
        int totalStates = states.length, numSortedStates = 0;
        boolean removedState[] = new boolean[states.length];
        for (int i = 0; i < removedState.length; i++) {
            removedState[i] = false;
        }

        //
        // Loop until we rank all items
        //
        while (numSortedStates < totalStates) {
            //
            // 4.  Find the total number of nonzero transitions 
            // from each state
            //
            for (int i = 0; i < states.length; i++) {
                numTransitions[i] = 0;

                if (! removedState[i]) {
                    for (int j = 0; j < states.length; j++) {
                        if (! removedState[j]) {
                            numTransitions[i] += transitions[i][j];
                        }
                    }
                }
            }

            //
            // 5. If there are zero transitions from a given state
            // (that is, it's a sink), then give it a self-loop
            // transition entry.  This means the sink node will
            // eventually rise to stationary likelihood of 100%.
            //
            for (int i = 0; i < numTransitions.length; i++) {
                if (! removedState[i] && numTransitions[i] == 0) {
                    transitions[i][i] = 1;
                    numTransitions[i] = 1;
                }
            }

            //
            // Build likelihoods for each state
            //
            for (int i = 0; i < states.length; i++) {
                lastStateDist[i] = (1.0f / (totalStates - numSortedStates));
                curStateDist[i] = (1.0f / (totalStates - numSortedStates));
            }

            //
            // 6.  Find the stationary distribution iteratively.
            //
            // REMIND - mjc - in the future we'd like to stop
            // iterating based on convergence criteria rather than
            // a hard-coded number of loops
            //
            for (int k = 0; k < (2 * states.length); k++) {

                // For every target state....
                for (int i = 0; i < states.length; i++) {
                    // Init target state's likelihood to zero.
                    curStateDist[i] = 0;

                    // Iterate through every source state...
                    if (! removedState[i]) {
                        for (int j = 0; j < states.length; j++) {
                            //
                            // If we transit from the current source to
                            // the current target, then adjust the target
                            // to have its share of the source's likelihood.
                            //
                            if (! removedState[j] && transitions[j][i] == 1) {
                                curStateDist[i] += (lastStateDist[j] / numTransitions[j]);
                            }
                        }
                    }
                }

                // Now copy the "cur" value to "last" values.
                System.arraycopy(curStateDist, 0, lastStateDist, 0, curStateDist.length);
            }

            //
            // 7. Now detect and remove sinks.  Place in sorted
            // list.  Mark removed states in the "removedState[]"
            // array
            //
            TreeSet stateSorter = new TreeSet(new Comparator() {
                public int compare(Object o1, Object o2) {
                    Integer pos1 = (Integer) o1;
                    Integer pos2 = (Integer) o2;

                    double score1 = curStateDist[pos1.intValue()];
                    double score2 = curStateDist[pos2.intValue()];
                    if (score1 > score2) {
                        return -1;
                    } else if (score1 == score2) {
                        return 0;
                    } else {
                        return 1;
                    }
                }
            }
                );

            for (int i = 0; i < states.length; i++) {
                if (! removedState[i]) {
                    stateSorter.add(new Integer(i));
                }
            }

            //
            // 8. Put the top-ranked items into the sorted result list
            // until we find an "end-of-sinks" break.  At that point
            // we restart the Markov-solving.
            //
            int numStatesMoved = 0;

            for (Iterator it = stateSorter.iterator(); it.hasNext(); ) {
                int index = ((Integer) it.next()).intValue();
                float rating = curStateDist[index];
                
                // 
                // When we notice a very large drop in ratings, we 
                // assume it's because we've found a sink region.
                // Remove the items in the sink and restart computation.
                //
                if ((numStatesMoved > 0) &&
                    (rating == 0 || rating < ((1.0 / (totalStates - numSortedStates)) / 10000.0))) {
                    break;
                } else {
                    fullRanking.put(states[index], new Integer(numSortedStates));
                    numSortedStates++;
                    removedState[index] = true;
                    numStatesMoved++;
                }
            }
        }
        solved = true;
    }

    /**
     * Find the position in the full list for the given
     * object.
     */
    public int getPos(Object obj) {
        if (! solved) {
            throw new IllegalArgumentException("Must call solveRanking() first.");
        }
        Integer pos = (Integer) fullRanking.get(obj);
        if (pos == null) {
            throw new IllegalArgumentException("Unknown item obj.");            
        }
        return pos.intValue();
    }

    /**
     * Return total number of states in final ranking
     */
    public int getNumStates() {
        return fullRanking.size();
    }

    /**
     * Compute the Kendall Tau distance between a given list
     * of ListItem objects and the current full ranking.
     * Must be called after solveRanking().
     */
    public double getKendallTauDistance(Object testList[], boolean normalized) {
        if (! solved) {
            throw new IllegalArgumentException("Must call solveRanking() first.");
        }

        int misOrderings = 0, maxOrderings = 0;
        //
        // Go through all pairs of elts in the testList.  See
        // if they are consistent with the fullList.
        //
        for (int i = 0; i < testList.length; i++) {
            for (int j = i + 1; j < testList.length; j++) {
                int pos1 = getPos(testList[i]);
                int pos2 = getPos(testList[j]);
                if (pos1 > pos2) {
                    misOrderings++;
                }
                maxOrderings++;
            }
        }

        if (normalized) {
            if (maxOrderings == 0) {
                return 0;
            }
            return misOrderings / (1.0 * maxOrderings);
        } else {
            return misOrderings;
        }
    }

    /**
     * Test the rank-solver
     */
    public static void main(String argv[]) throws NumberFormatException {
        if (argv.length < 1) {
            System.out.println("Usage: java net.nutch.quality.MarkovRankSolver <maxStates> [-seed <seed>]");
            return;
        }

        int maxStates = Integer.parseInt(argv[0]);
        long seed = new Random().nextInt();
        boolean usedSeed = false;

        if (argv.length > 1) {
            for (int i = 1; i < argv.length; i++) {
                if ("-seed".equals(argv[i])) {
                    seed = Long.parseLong(argv[i + 1]);
                    usedSeed = true;
                    i++;                    
                }
            }
        }

        if (usedSeed) {
            System.out.println("Using seed: " + seed);
        } else {
            System.out.println("Seed: " + seed);
        }

        //
        // Finally, create the markov-model solver
        //
        MarkovRankSolver solver = new MarkovRankSolver();
        Random rand = new Random(seed);

        // Build state set
        Integer states[] = new Integer[maxStates];
        for (int i = 0; i < states.length; i++) {
            states[i] = new Integer(i);
        }

        // Build 10 slightly-different orderings
        for (int i = 0; i < 10; i++) {
            Integer ordering[] = new Integer[maxStates];
            for (int j = 0; j < ordering.length; j++) {
                ordering[j] = states[(j + (Math.abs(rand.nextInt()) % 2)) % ordering.length];
                System.out.print(ordering[j] + " ");
            }
            System.out.println();
            solver.addOrdering(ordering);
        }

        // And a very boring one that contains each item
        Integer ordering[] = new Integer[states.length];
        for (int i = 0; i < ordering.length; i++) {
            ordering[i] = states[i];
        }
        solver.addOrdering(ordering);

        System.out.println("About to solve problem...");
        solver.solveRanking();

        System.out.println("-----------------------------------");
        for (int i = 0; i < states.length; i++) {
            System.out.println(states[i] + "\t\t" + solver.getPos(states[i]));
        }
    }
}